
import requests
from scrapy.selector import Selector
 
import re
import time

from db import MovieList

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
}


def get_list():
    
    query= 'select count(*) c from movie_list'
    result = MovieList.raw(query)
    offset = result[0].c
    
    res = requests.get(
        f'https://www.maoyan.com/films?showType=3&sortId=3&offset={offset}', headers=headers)

    # print(res.text)
    # print('泰坦尼克号' in res.text)
    # with open('./test.html','w',encoding='utf-8') as f:
    #     f.write(res.text)

    selector = Selector(text=res.text)
    dds = selector.xpath('//dd')

    def parse_dd(dd):
        href = dd.xpath('./div[2]/a/@href').get()
        movie_id = re.findall(r'\d+', href)[0]
        name = dd.xpath('./div[2]/a/text()').get()
        score_integer = dd.xpath('./div[3]/i[1]/text()').get()
        score_fraction = dd.xpath('./div[3]/i[2]/text()').get()
        score = score_integer+score_fraction
        type = dd.xpath(
            './div[1]/div[2]/a/div/div[2]/text()').getall()[1].strip()
        actor = dd.xpath(
            './div[1]/div[2]/a/div/div[3]/text()').getall()[1].strip()
        date = dd.xpath(
            './div[1]/div[2]/a/div/div[4]/text()').getall()[1].strip()

        return{
            'movie_id': movie_id,
            'name': name,
            'score': score,
            'type': type,
            'actor': actor,
            'date': date,
        }

    dd_list = list(map(parse_dd, dds))
    if len(dd_list)==0:
        return False
    MovieList.insert_many(dd_list).execute()
    return True

def main():
    while True:
        res = get_list()
        if not res:
            break
        time.sleep(2)
    print('ok')

def test_db():
    MovieList.insert_many([
        {'name':'张三'},
        {'name':'李四'}
    ]).execute()

if __name__ == "__main__":
    main()
    # test_db()
